// Copyright 2023 Ulvetanna Inc.
// Use of this source code is governed by a Apache-2.0 style license that
// can be found in the LICENSE.ulvetanna file.

#ifndef TACHYON_MATH_FINITE_FIELDS_BINARY_FIELDS_BINARY_TOWER_OPERATIONS_H_
#define TACHYON_MATH_FINITE_FIELDS_BINARY_FIELDS_BINARY_TOWER_OPERATIONS_H_

#include <stddef.h>
#include <stdint.h>

#include "tachyon/base/logging.h"

namespace tachyon::math {

template <typename T, typename SFINAE = void>
struct BinaryTowerOperations;

namespace internal {

constexpr uint8_t DoBinaryMul(uint8_t lhs, uint8_t rhs) {
  // clang-format off
  constexpr uint8_t kTable[] = {
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe,
    0x20, 0x13, 0xa8, 0x9b, 0xec, 0xdf, 0x64, 0x57,
    0x30, 0x21, 0xfc, 0xed, 0x74, 0x65, 0xb8, 0xa9,
    0x40, 0xc8, 0xd9, 0x51, 0xae, 0x26, 0x37, 0xbf,
    0x50, 0xfa, 0x8d, 0x27, 0x36, 0x9c, 0xeb, 0x41,
    0x60, 0xdb, 0x71, 0xca, 0x42, 0xf9, 0x53, 0xe8,
    0x70, 0xe9, 0x25, 0xbc, 0xda, 0x43, 0x8f, 0x16,
    0x80, 0x4c, 0x6e, 0xa2, 0xf7, 0x3b, 0x19, 0xd5,
    0x90, 0x7e, 0x3a, 0xd4, 0x6f, 0x81, 0xc5, 0x2b,
    0xa0, 0x5f, 0xc6, 0x39, 0x1b, 0xe4, 0x7d, 0x82,
    0xb0, 0x6d, 0x92, 0x4f, 0x83, 0x5e, 0xa1, 0x7c,
    0xc0, 0x84, 0xb7, 0xf3, 0x59, 0x1d, 0x2e, 0x6a,
    0xd0, 0xb6, 0xe3, 0x85, 0xc1, 0xa7, 0xf2, 0x94,
    0xe0, 0x97, 0x1f, 0x68, 0xb5, 0xc2, 0x4a, 0x3d,
    0xf0, 0xa5, 0x4b, 0x1e, 0x2d, 0x78, 0x96, 0xc3,
  };
  // clang-format on
  size_t idx = size_t{lhs} << 4 | rhs;
  return (kTable[idx >> 1] >> ((idx & 1) * 4)) & 0x0f;
}

constexpr uint8_t DoBinaryInverse(uint8_t x) {
  // clang-format off
  constexpr uint8_t kTable[] = {
    0x00, 0x01, 0x03, 0x02, 0x06, 0x0e, 0x04, 0x0f,
    0x0d, 0x0a, 0x09, 0x0c, 0x0b, 0x08, 0x05, 0x07,
    0x14, 0x67, 0x94, 0x7b, 0x10, 0x66, 0x9e, 0x7e,
    0xd2, 0x81, 0x27, 0x4b, 0xd1, 0x8f, 0x2f, 0x42,
    0x3c, 0xe6, 0xde, 0x7c, 0xb3, 0xc1, 0x4a, 0x1a,
    0x30, 0xe9, 0xdd, 0x79, 0xb1, 0xc6, 0x43, 0x1e,
    0x28, 0xe8, 0x9d, 0xb9, 0x63, 0x39, 0x8d, 0xc2,
    0x62, 0x35, 0x83, 0xc5, 0x20, 0xe7, 0x97, 0xbb,
    0x61, 0x48, 0x1f, 0x2e, 0xac, 0xc8, 0xbc, 0x56,
    0x41, 0x60, 0x26, 0x1b, 0xcf, 0xaa, 0x5b, 0xbe,
    0xef, 0x73, 0x6d, 0x5e, 0xf7, 0x86, 0x47, 0xbd,
    0x88, 0xfc, 0xbf, 0x4e, 0x76, 0xe0, 0x53, 0x6c,
    0x49, 0x40, 0x38, 0x34, 0xe4, 0xeb, 0x15, 0x11,
    0x8b, 0x85, 0xaf, 0xa9, 0x5f, 0x52, 0x98, 0x92,
    0xfb, 0xb5, 0xee, 0x51, 0xb7, 0xf0, 0x5c, 0xe1,
    0xdc, 0x2b, 0x95, 0x13, 0x23, 0xdf, 0x17, 0x9f,
    0xd3, 0x19, 0xc4, 0x3a, 0x8a, 0x69, 0x55, 0xf6,
    0x58, 0xfd, 0x84, 0x68, 0xc3, 0x36, 0xd0, 0x1d,
    0xa6, 0xf3, 0x6f, 0x99, 0x12, 0x7a, 0xba, 0x3e,
    0x6e, 0x93, 0xa0, 0xf8, 0xb8, 0x32, 0x16, 0x7f,
    0x9a, 0xf9, 0xe2, 0xdb, 0xed, 0xd8, 0x90, 0xf2,
    0xae, 0x6b, 0x4d, 0xce, 0x44, 0xc9, 0xa8, 0x6a,
    0xc7, 0x2c, 0xc0, 0x24, 0xfa, 0x71, 0xf1, 0x74,
    0x9c, 0x33, 0x96, 0x3f, 0x46, 0x57, 0x4f, 0x5a,
    0xb2, 0x25, 0x37, 0x8c, 0x82, 0x3b, 0x2d, 0xb0,
    0x45, 0xad, 0xd7, 0xff, 0xf4, 0xd4, 0xab, 0x4c,
    0x8e, 0x1c, 0x18, 0x80, 0xcd, 0xf5, 0xfe, 0xca,
    0xa5, 0xec, 0xe3, 0xa3, 0x78, 0x2a, 0x22, 0x7d,
    0x5d, 0x77, 0xa2, 0xda, 0x64, 0xea, 0x21, 0x3d,
    0x31, 0x29, 0xe5, 0x65, 0xd9, 0xa4, 0x72, 0x50,
    0x75, 0xb6, 0xa7, 0x91, 0xcc, 0xd5, 0x87, 0x54,
    0x9b, 0xa1, 0xb4, 0x70, 0x59, 0x89, 0xd6, 0xcb,
  };
  // clang-format on
  return kTable[x];
}

}  // namespace internal

template <typename F>
struct BinaryTowerOperations<F, std::enable_if_t<F::kBits == 1>> {
  constexpr static F Mul(F lhs, F rhs) { return F(lhs.value() & rhs.value()); }

  constexpr static F MulByAlpha(F x) { return x; }

  constexpr static F Square(F x) { return x; }

  constexpr static std::optional<F> Inverse(F x) {
    if (UNLIKELY(x.IsZero())) {
      LOG_IF_NOT_GPU(ERROR) << "Inverse of zero attempted";
      return std::nullopt;
    }
    return x;
  }
};

template <typename F>
struct BinaryTowerOperations<F, std::enable_if_t<F::kBits == 2>> {
  constexpr static F Mul(F lhs, F rhs) {
    return F(internal::DoBinaryMul(lhs.value(), rhs.value()));
  }

  constexpr static F MulByAlpha(F x) { return Mul(x, F(2)); }

  constexpr static F Square(F x) { return Mul(x, x); }

  constexpr static std::optional<F> Inverse(F x) {
    if (UNLIKELY(x.IsZero())) {
      LOG_IF_NOT_GPU(ERROR) << "Inverse of zero attempted";
      return std::nullopt;
    }
    return F(internal::DoBinaryInverse(x.value()));
  }
};

template <typename F>
struct BinaryTowerOperations<F, std::enable_if_t<F::kBits == 4>> {
  constexpr static F Mul(F lhs, F rhs) {
    return F(internal::DoBinaryMul(lhs.value(), rhs.value()));
  }

  constexpr static F MulByAlpha(F x) { return Mul(x, F(4)); }

  constexpr static F Square(F x) { return Mul(x, x); }

  constexpr static std::optional<F> Inverse(F x) {
    if (UNLIKELY(x.IsZero())) {
      LOG_IF_NOT_GPU(ERROR) << "Inverse of zero attempted";
      return std::nullopt;
    }
    return F(internal::DoBinaryInverse(x.value()));
  }
};

template <typename F>
struct BinaryTowerOperations<F, std::enable_if_t<F::kBits == 8>> {
  constexpr static F Mul(F lhs, F rhs) {
    // clang-format off
    constexpr uint8_t kExpTable[] = {
      0x1,  0x13, 0x43, 0x66, 0xab, 0x8c, 0x60, 0xc6,
      0x91, 0xca, 0x59, 0xb2, 0x6a, 0x63, 0xf4, 0x53,
      0x17, 0x0f, 0xfa, 0xba, 0xee, 0x87, 0xd6, 0xe0,
      0x6e, 0x2f, 0x68, 0x42, 0x75, 0xe8, 0xea, 0xcb,
      0x4a, 0xf1, 0x0c, 0xc8, 0x78, 0x33, 0xd1, 0x9e,
      0x30, 0xe3, 0x5c, 0xed, 0xb5, 0x14, 0x3d, 0x38,
      0x67, 0xb8, 0xcf, 0x06, 0x6d, 0x1d, 0xaa, 0x9f,
      0x23, 0xa0, 0x3a, 0x46, 0x39, 0x74, 0xfb, 0xa9,
      0xad, 0xe1, 0x7d, 0x6c, 0x0e, 0xe9, 0xf9, 0x88,
      0x2c, 0x5a, 0x80, 0xa8, 0xbe, 0xa2, 0x1b, 0xc7,
      0x82, 0x89, 0x3f, 0x19, 0xe6, 0x03, 0x32, 0xc2,
      0xdd, 0x56, 0x48, 0xd0, 0x8d, 0x73, 0x85, 0xf7,
      0x61, 0xd5, 0xd2, 0xac, 0xf2, 0x3e, 0x0a, 0xa5,
      0x65, 0x99, 0x4e, 0xbd, 0x90, 0xd9, 0x1a, 0xd4,
      0xc1, 0xef, 0x94, 0x95, 0x86, 0xc5, 0xa3, 0x08,
      0x84, 0xe4, 0x22, 0xb3, 0x79, 0x20, 0x92, 0xf8,
      0x9b, 0x6f, 0x3c, 0x2b, 0x24, 0xde, 0x64, 0x8a,
      0xd,  0xdb, 0x3b, 0x55, 0x7a, 0x12, 0x50, 0x25,
      0xcd, 0x27, 0xec, 0xa6, 0x57, 0x5b, 0x93, 0xeb,
      0xd8, 0x09, 0x97, 0xa7, 0x44, 0x18, 0xf5, 0x40,
      0x54, 0x69, 0x51, 0x36, 0x8e, 0x41, 0x47, 0x2a,
      0x37, 0x9d, 0x02, 0x21, 0x81, 0xbb, 0xfd, 0xc4,
      0xb0, 0x4b, 0xe2, 0x4f, 0xae, 0xd3, 0xbf, 0xb1,
      0x58, 0xa1, 0x29, 0x05, 0x5f, 0xdf, 0x77, 0xc9,
      0x6b, 0x70, 0xb7, 0x35, 0xbc, 0x83, 0x9a, 0x7c,
      0x7f, 0x4d, 0x8f, 0x52, 0x04, 0x4c, 0x9c, 0x11,
      0x62, 0xe7, 0x10, 0x71, 0xa4, 0x76, 0xda, 0x28,
      0x16, 0x1c, 0xb9, 0xdc, 0x45, 0x0b, 0xb6, 0x26,
      0xff, 0xe5, 0x31, 0xf0, 0x1f, 0x8b, 0x1e, 0x98,
      0x5d, 0xfe, 0xf6, 0x72, 0x96, 0xb4, 0x07, 0x7e,
      0x5e, 0xcc, 0x34, 0xaf, 0xc0, 0xfc, 0xd7, 0xf3,
      0x2d, 0x49, 0xc3, 0xce, 0x15, 0x2e, 0x7b, 0x00,
    };
    constexpr uint8_t kLogTable[] = {
      0x00, 0x00, 0xaa, 0x55, 0xcc, 0xbb, 0x33, 0xee,
      0x77, 0x99, 0x66, 0xdd, 0x22, 0x88, 0x44, 0x11,
      0xd2, 0xcf, 0x8d, 0x01, 0x2d, 0xfc, 0xd8, 0x10,
      0x9d, 0x53, 0x6e, 0x4e, 0xd9, 0x35, 0xe6, 0xe4,
      0x7d, 0xab, 0x7a, 0x38, 0x84, 0x8f, 0xdf, 0x91,
      0xd7, 0xba, 0xa7, 0x83, 0x48, 0xf8, 0xfd, 0x19,
      0x28, 0xe2, 0x56, 0x25, 0xf2, 0xc3, 0xa3, 0xa8,
      0x2f, 0x3c, 0x3a, 0x8a, 0x82, 0x2e, 0x65, 0x52,
      0x9f, 0xa5, 0x1b, 0x02, 0x9c, 0xdc, 0x3b, 0xa6,
      0x5a, 0xf9, 0x20, 0xb1, 0xcd, 0xc9, 0x6a, 0xb3,
      0x8e, 0xa2, 0xcb, 0x0f, 0xa0, 0x8b, 0x59, 0x94,
      0xb8, 0x0a, 0x49, 0x95, 0x2a, 0xe8, 0xf0, 0xbc,
      0x06, 0x60, 0xd0, 0x0d, 0x86, 0x68, 0x03, 0x30,
      0x1a, 0xa1, 0x0c, 0xc0, 0x43, 0x34, 0x18, 0x81,
      0xc1, 0xd3, 0xeb, 0x5d, 0x3d, 0x1c, 0xd5, 0xbe,
      0x24, 0x7c, 0x8c, 0xfe, 0xc7, 0x42, 0xef, 0xc8,
      0x4a, 0xac, 0x50, 0xc5, 0x78, 0x5e, 0x74, 0x15,
      0x47, 0x51, 0x87, 0xe5, 0x05, 0x5c, 0xa4, 0xca,
      0x6c, 0x08, 0x7e, 0x96, 0x72, 0x73, 0xec, 0x9a,
      0xe7, 0x69, 0xc6, 0x80, 0xce, 0xa9, 0x27, 0x37,
      0x39, 0xb9, 0x4d, 0x76, 0xd4, 0x67, 0x93, 0x9b,
      0x4b, 0x3f, 0x36, 0x04, 0x63, 0x40, 0xb4, 0xf3,
      0xb0, 0xb7, 0x0b, 0x7b, 0xed, 0x2c, 0xde, 0xc2,
      0x31, 0xda, 0x13, 0xad, 0xc4, 0x6b, 0x4c, 0xb6,
      0xf4, 0x70, 0x57, 0xfa, 0xaf, 0x75, 0x07, 0x4f,
      0x23, 0xbf, 0x09, 0x1f, 0xf1, 0x90, 0xfb, 0x32,
      0x5b, 0x26, 0x62, 0xb5, 0x6f, 0x61, 0x16, 0xf6,
      0x98, 0x6d, 0xd6, 0x89, 0xdb, 0x58, 0x85, 0xbd,
      0x17, 0x41, 0xb2, 0x29, 0x79, 0xe1, 0x54, 0xd1,
      0x1d, 0x45, 0x1e, 0x97, 0x92, 0x2b, 0x14, 0x71,
      0xe3, 0x21, 0x64, 0xf7, 0x0e, 0x9e, 0xea, 0x5f,
      0x7f, 0x46, 0x12, 0x3e, 0xf5, 0xae, 0xe9, 0xe0,
    };
    // clang-format on
    if (lhs.IsZero() || rhs.IsZero()) return F::Zero();

    // Safety: |log_table_index| is smaller than 255 because:
    // - all values in |kLogTable| do not exceed 254
    // - sum of two values do not exceed 254*2
    // - the previous line reduces |log_table_index| by 255 if it is
    //   bigger than 254
    size_t log_table_index =
        size_t{kLogTable[lhs.value()]} + kLogTable[rhs.value()];
    size_t exp_table_index =
        log_table_index > 254 ? log_table_index - 255 : log_table_index;
    return F(kExpTable[exp_table_index]);
  }

  constexpr static F MulByAlpha(F x) {
    // clang-format off
    constexpr uint8_t kTable[] = {
      0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
      0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0,
      0x41, 0x51, 0x61, 0x71, 0x01, 0x11, 0x21, 0x31,
      0xc1, 0xd1, 0xe1, 0xf1, 0x81, 0x91, 0xa1, 0xb1,
      0x82, 0x92, 0xa2, 0xb2, 0xc2, 0xd2, 0xe2, 0xf2,
      0x02, 0x12, 0x22, 0x32, 0x42, 0x52, 0x62, 0x72,
      0xc3, 0xd3, 0xe3, 0xf3, 0x83, 0x93, 0xa3, 0xb3,
      0x43, 0x53, 0x63, 0x73, 0x03, 0x13, 0x23, 0x33,
      0x94, 0x84, 0xb4, 0xa4, 0xd4, 0xc4, 0xf4, 0xe4,
      0x14, 0x04, 0x34, 0x24, 0x54, 0x44, 0x74, 0x64,
      0xd5, 0xc5, 0xf5, 0xe5, 0x95, 0x85, 0xb5, 0xa5,
      0x55, 0x45, 0x75, 0x65, 0x15, 0x05, 0x35, 0x25,
      0x16, 0x06, 0x36, 0x26, 0x56, 0x46, 0x76, 0x66,
      0x96, 0x86, 0xb6, 0xa6, 0xd6, 0xc6, 0xf6, 0xe6,
      0x57, 0x47, 0x77, 0x67, 0x17, 0x07, 0x37, 0x27,
      0xd7, 0xc7, 0xf7, 0xe7, 0x97, 0x87, 0xb7, 0xa7,
      0xe8, 0xf8, 0xc8, 0xd8, 0xa8, 0xb8, 0x88, 0x98,
      0x68, 0x78, 0x48, 0x58, 0x28, 0x38, 0x08, 0x18,
      0xa9, 0xb9, 0x89, 0x99, 0xe9, 0xf9, 0xc9, 0xd9,
      0x29, 0x39, 0x09, 0x19, 0x69, 0x79, 0x49, 0x59,
      0x6a, 0x7a, 0x4a, 0x5a, 0x2a, 0x3a, 0x0a, 0x1a,
      0xea, 0xfa, 0xca, 0xda, 0xaa, 0xba, 0x8a, 0x9a,
      0x2b, 0x3b, 0x0b, 0x1b, 0x6b, 0x7b, 0x4b, 0x5b,
      0xab, 0xbb, 0x8b, 0x9b, 0xeb, 0xfb, 0xcb, 0xdb,
      0x7c, 0x6c, 0x5c, 0x4c, 0x3c, 0x2c, 0x1c, 0x0c,
      0xfc, 0xec, 0xdc, 0xcc, 0xbc, 0xac, 0x9c, 0x8c,
      0x3d, 0x2d, 0x1d, 0x0d, 0x7d, 0x6d, 0x5d, 0x4d,
      0xbd, 0xad, 0x9d, 0x8d, 0xfd, 0xed, 0xdd, 0xcd,
      0xfe, 0xee, 0xde, 0xce, 0xbe, 0xae, 0x9e, 0x8e,
      0x7e, 0x6e, 0x5e, 0x4e, 0x3e, 0x2e, 0x1e, 0x0e,
      0xbf, 0xaf, 0x9f, 0x8f, 0xff, 0xef, 0xdf, 0xcf,
      0x3f, 0x2f, 0x1f, 0x0f, 0x7f, 0x6f, 0x5f, 0x4f,
    };
    // clang-format on
    return F(kTable[x.value()]);
  }

  constexpr static F Square(F x) {
    // clang-format off
    constexpr uint8_t kTable[] = {
      0x00, 0x01, 0x03, 0x02, 0x09, 0x08, 0x0a, 0x0b,
      0x07, 0x06, 0x04, 0x05, 0x0e, 0x0f, 0x0d, 0x0c,
      0x41, 0x40, 0x42, 0x43, 0x48, 0x49, 0x4b, 0x4a,
      0x46, 0x47, 0x45, 0x44, 0x4f, 0x4e, 0x4c, 0x4d,
      0xc3, 0xc2, 0xc0, 0xc1, 0xca, 0xcb, 0xc9, 0xc8,
      0xc4, 0xc5, 0xc7, 0xc6, 0xcd, 0xcc, 0xce, 0xcf,
      0x82, 0x83, 0x81, 0x80, 0x8b, 0x8a, 0x88, 0x89,
      0x85, 0x84, 0x86, 0x87, 0x8c, 0x8d, 0x8f, 0x8e,
      0xa9, 0xa8, 0xaa, 0xab, 0xa0, 0xa1, 0xa3, 0xa2,
      0xae, 0xaf, 0xad, 0xac, 0xa7, 0xa6, 0xa4, 0xa5,
      0xe8, 0xe9, 0xeb, 0xea, 0xe1, 0xe0, 0xe2, 0xe3,
      0xef, 0xee, 0xec, 0xed, 0xe6, 0xe7, 0xe5, 0xe4,
      0x6a, 0x6b, 0x69, 0x68, 0x63, 0x62, 0x60, 0x61,
      0x6d, 0x6c, 0x6e, 0x6f, 0x64, 0x65, 0x67, 0x66,
      0x2b, 0x2a, 0x28, 0x29, 0x22, 0x23, 0x21, 0x20,
      0x2c, 0x2d, 0x2f, 0x2e, 0x25, 0x24, 0x26, 0x27,
      0x57, 0x56, 0x54, 0x55, 0x5e, 0x5f, 0x5d, 0x5c,
      0x50, 0x51, 0x53, 0x52, 0x59, 0x58, 0x5a, 0x5b,
      0x16, 0x17, 0x15, 0x14, 0x1f, 0x1e, 0x1c, 0x1d,
      0x11, 0x10, 0x12, 0x13, 0x18, 0x19, 0x1b, 0x1a,
      0x94, 0x95, 0x97, 0x96, 0x9d, 0x9c, 0x9e, 0x9f,
      0x93, 0x92, 0x90, 0x91, 0x9a, 0x9b, 0x99, 0x98,
      0xd5, 0xd4, 0xd6, 0xd7, 0xdc, 0xdd, 0xdf, 0xde,
      0xd2, 0xd3, 0xd1, 0xd0, 0xdb, 0xda, 0xd8, 0xd9,
      0xfe, 0xff, 0xfd, 0xfc, 0xf7, 0xf6, 0xf4, 0xf5,
      0xf9, 0xf8, 0xfa, 0xfb, 0xf0, 0xf1, 0xf3, 0xf2,
      0xbf, 0xbe, 0xbc, 0xbd, 0xb6, 0xb7, 0xb5, 0xb4,
      0xb8, 0xb9, 0xbb, 0xba, 0xb1, 0xb0, 0xb2, 0xb3,
      0x3d, 0x3c, 0x3e, 0x3f, 0x34, 0x35, 0x37, 0x36,
      0x3a, 0x3b, 0x39, 0x38, 0x33, 0x32, 0x30, 0x31,
      0x7c, 0x7d, 0x7f, 0x7e, 0x75, 0x74, 0x76, 0x77,
      0x7b, 0x7a, 0x78, 0x79, 0x72, 0x73, 0x71, 0x70,
    };
    // clang-format on
    return F(kTable[x.value()]);
  }

  constexpr static std::optional<F> Inverse(F x) {
    if (UNLIKELY(x.IsZero())) {
      LOG_IF_NOT_GPU(ERROR) << "Inverse of zero attempted";
      return std::nullopt;
    }
    return F(internal::DoBinaryInverse(x.value()));
  }
};

template <typename F>
struct BinaryTowerOperations<F, std::enable_if_t<(F::kBits >= 16)>> {
  using SubField = typename F::SubField;

  constexpr static F Mul(F lhs, F rhs) {
    // clang-format off
    //   (lhs₀ + lhs₁X) * (rhs₀ + rhs₁X)
    // = lhs₀ * rhs₀ + (lhs₀ * rhs₁ + lhs₁ * rhs₀)X + (lhs₁ * rhs₁)X²
    // = lhs₀ * rhs₀ + (lhs₀ * rhs₁ + lhs₁ * rhs₀)X + (lhs₁ * rhs₁)(αX + 1)
    // = lhs₀ * rhs₀ + lhs₁ * rhs₁ + (lhs₀ * rhs₁ + lhs₁ * rhs₀ + lhs₁ * rhs₁ * α)X
    // clang-format on
    auto [lhs0, lhs1] = lhs.Decompose();
    auto [rhs0, rhs1] = rhs.Decompose();
    SubField z0 = lhs0 * rhs0;
    SubField z1 = lhs1 * rhs1;
    SubField z0z1 = z0 + z1;
    SubField z2 = (lhs0 + lhs1) * (rhs0 + rhs1) - z0z1;
    SubField z1a = BinaryTowerOperations<SubField>::MulByAlpha(z1);
    return F::Compose(z0z1, z2 + z1a);
  }

  constexpr static F MulByAlpha(F x) {
    //   (x₀ + x₁X) * X
    // = x₀X + x₁X²
    // = x₀X + x₁(αX + 1)
    // = x₁ + (x₀ + x₁ * α)X
    auto [x0, x1] = x.Decompose();
    SubField x1a = BinaryTowerOperations<SubField>::MulByAlpha(x1);
    return F::Compose(x1, x0 + x1a);
  }

  constexpr static F Square(F x) {
    //   (x₀ + x₁X)²
    // = x₀² + (2 * x₀ * x₁)X + x₁²X²
    // = x₀² + x₁²X²
    // = x₀² + x₁²(αX + 1)
    // = x₀² + x₁² + (x₁² * α)X
    auto [x0, x1] = x.Decompose();
    SubField z0 = x0.Square();
    SubField z1 = x1.Square();
    SubField z1a = BinaryTowerOperations<SubField>::MulByAlpha(z1);
    return F::Compose(z0 + z1, z1a);
  }

  constexpr static std::optional<F> Inverse(F x) {
    //   (x₀ + x₁X)(x₀ + x₁ * α + x₁X)
    // = x₀² + x₀ * x₁ * α + (x₀ * x₁)X + (x₀ * x₁)X + (x₁² * α)X + x₁²X²
    // = x₀² + x₀ * x₁ * α + (x₁² * α)X + x₁²X²
    // = x₀² + x₀ * x₁ * α + x₁²(αX + X²)
    // = x₀² + x₀ * x₁ * α + x₁²
    // = x₀(x₀ + x₁ * α) + x₁²
    // = δ
    // Therefore, (x₀ + x₁X)⁻¹ = δ⁻¹(x₀ + x₁ * α + x₁X)
    auto [x0, x1] = x.Decompose();
    SubField x0x1a = x0 + BinaryTowerOperations<SubField>::MulByAlpha(x1);
    SubField delta = x0 * x0x1a + x1.Square();
    std::optional<SubField> delta_inv = delta.Inverse();
    if (LIKELY(delta_inv)) {
      SubField inv0 = *delta_inv * x0x1a;
      SubField inv1 = *delta_inv * x1;
      return F::Compose(inv0, inv1);
    }
    return std::nullopt;
  }
};

}  // namespace tachyon::math

#endif  // TACHYON_MATH_FINITE_FIELDS_BINARY_FIELDS_BINARY_TOWER_OPERATIONS_H_
